import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import pickleConditions
| Model | FiveVTS | Chickenpox | Pedalme | Wikimath | Windmillsmall | MontevideoBus |
|---|---|---|---|---|---|---|
| Max iter. | 30 | 30 | 30 | 30 | 30 | 30 |
| Epochs | 50 | 50 | 50 | 50 | 50 | 50 |
| Lags | 2 | 4 | 4 | 8 | 8 | 4 |
| Interpolation | linear | linear | nearest | linear | linear | nearest |
| Filters | ||||||
| GConvGRU | 12 | 16 | 12 | 12 | 12 | 12 |
| GConvLSTM | 12 | 32 | 2 | 64 | 16 | 12 |
| GCLSTM | 4 | 16 | 4 | 64 | 16 | 12 |
| LRGCN | 4 | 8 | 8 | 32 | 12 | 2 |
| DyGrEncoder | 12 | 12 | 12 | 12 | 12 | 12 |
| EvolveGCNH | No need | No need | No need | No need | No need | No need |
| EvolveGCNO | No need | No need | No need | No need | No need | No need |
| TGCN | 12 | 12 | 12 | 12 | 12 | 8 |
| DCRNN | 2 | 16 | 8 | 12 | 4 | 12 |
Import
Data
df = pd.read_csv('../Data/df_fig.csv')
df.iloc[:,1:].head()| dataset | method | mrate | mtype | lags | nof_filters | inter_method | epoch | mse | calculation_time | model | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | fivenodes | STGCN | 0.0 | NaN | 2 | 12.0 | NaN | 50.0 | 0.729374 | 80.985221 | GConvGRU |
| 1 | fivenodes | STGCN | 0.0 | NaN | 2 | 12.0 | NaN | 50.0 | 0.729082 | 80.891788 | GConvGRU |
| 2 | fivenodes | STGCN | 0.7 | rand | 2 | 12.0 | linear | 50.0 | 1.892262 | 81.976547 | GConvGRU |
| 3 | fivenodes | STGCN | 0.7 | rand | 2 | 12.0 | nearest | 50.0 | 2.211288 | 87.803869 | GConvGRU |
| 4 | fivenodes | STGCN | 0.8 | rand | 2 | 12.0 | linear | 50.0 | 2.072818 | 103.648742 | GConvGRU |
1
Fivenodes 데이터 셋의 GConvGRU 모델에서 데이터의 결측값이 증가할수록 Classic 모델(여기선GConvGRU)에 비해 Proposed 모델의 error값이 확연히 느리게 증가하는 현상
추세선은 missing rate 0.3과 0.8에서 mse 총 평균으로 그렸음
내용
- First approach is on Figure \(\ref{}\). That showed the result using
FiveVTSdataset andGConvGRUmodel by each missing rate. As the proportion of missing values gradually increases, there is a tendency for the MSE to rise. Especially, we compared values between Classic method(GConvGRU) and Proposed method. Both models indicate a similar trend; however, the MSE for the Classic method exhibits a rapid increase. Conversely, the MSE trend for the Proposed method shows a slower increase compared to the Classic approach.
- First approach is on Figure \(\ref{}\). That showed the result using
tidydata = pd.concat([df.query('dataset=="fivenodes" and model=="GConvGRU" and nof_filters==12 and lags==2 and inter_method=="linear" and epoch==50 and mtype=="rand"')])
tidydata['mrate'] = tidydata['mrate'].astype(str)
tidydata = tidydata.sort_values(by=['model','mrate'])
fig = px.box(tidydata,x='mrate',y='mse',color='method',width=70, log_y=True,facet_col='model')
fig.layout['annotations'][0]['text']=''
fig.layout['xaxis']['title']['text']='Missing Rate'
fig.layout['yaxis']['title']['text']='Mean Square Error'
fig.data[0]['marker']['color'] = 'blue'
fig.data[0]['name'] = 'Classic'
fig.data[1]['marker']['color'] = 'red'
fig.data[1]['name'] = 'Proposed'
fig.layout['legend']['title']='Method'
fig.update_layout(template="seaborn")
fig.update_layout(title_text="Randomly Missing Vales on FiveVTS")
fig.update_layout(height=600, width=1900)
fig.layout.xaxis2 = go.layout.XAxis(overlaying='x', range=[0, 1], showticklabels=False)
fig.add_scatter(x = [0.1, 0.901], y = [(tidydata.query('mrate=="0.3"'))['mse'].mean(), (tidydata.query('mrate=="0.8"'))['mse'].mean()], mode='lines', xaxis='x2',
showlegend=False, line=dict(dash='dashdot', color = "black", width = 2))
fig.update_layout(legend=dict(
yanchor="top",
y=0.99,
xanchor="left",
x=0.01
))
# fig.update_layout(title_text="FiveVTS")
# # with open('Figures/실험1모델비교F1.pkl','wb') as f:
# # pickle.dump(fig,f)
fig2
전개: GConvGRU 모델만이 아니라 다른 모델에서도 MSE가 Classic에 비해 Proposed가 낮게 나오는 trend
- 내용 According to Figure 111, we checked the tendency of MSE between Classic and Propsosed method while the missing rate is increasing. We try to research this trend can still show other classic models. Figure 222 depicts a similar pattern to Figure 111, indicating that as the missing rate increases, the MSE for the Proposed method outperforms that of the Classic method.
tidydata = pd.concat([
df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['nof_filters']==12) &
(df['lags']==2) & (df['epoch']==50) & (df['model']=='GConvGRU') & (df['mrate'].isin([0.3, 0.5, 0.6, 0.7, 0.8]))],
df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['nof_filters']==12) &
(df['lags']==2) & (df['epoch']==50) & (df['model']=='GConvLSTM') & (df['mrate'].isin([0.3, 0.5, 0.6, 0.7, 0.8]))],
df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['nof_filters']==4) &
(df['lags']==2) & (df['epoch']==50) & (df['model']=='GCLSTM') & (df['mrate'].isin([0.3, 0.5, 0.6, 0.7, 0.8]))],
df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['nof_filters']==4) &
(df['lags']==2) & (df['epoch']==50) & (df['model']=='LRGCN') & (df['mrate'].isin([0.3, 0.5, 0.6, 0.7, 0.8]))],
df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['nof_filters']==12) &
(df['lags']==2) & (df['epoch']==50) & (df['model']=='DyGrEncoder') & (df['mrate'].isin([0.3, 0.5, 0.6, 0.7, 0.8]))],
df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['lags']==2) &
(df['epoch']==50) & (df['model']=='EvolveGCNH') & (df['mrate'].isin([0.3, 0.5, 0.6, 0.7, 0.8]))],
df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['lags']==2) &
(df['epoch']==50) & (df['model']=='EvolveGCNO') & (df['mrate'].isin([0.3, 0.5, 0.6, 0.7, 0.8]))],
df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['nof_filters']==12) &
(df['lags']==2) & (df['epoch']==50) & (df['model']=='TGCN') & (df['mrate'].isin([0.3, 0.5, 0.6, 0.7, 0.8]))],
df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['nof_filters']==2) &
(df['lags']==2) & (df['epoch']==50) & (df['model']=='DCRNN') & (df['mrate'].isin([0.3, 0.5, 0.6, 0.7, 0.8]))]
])
tidydata['model'] = pd.Categorical(tidydata['model'], categories=["GConvGRU", "GConvLSTM", "GCLSTM", "LRGCN", "DyGrEncoder", "EvolveGCNH", "EvolveGCNO", "TGCN", "DCRNN"])
tidydata['method'] = pd.Categorical(tidydata['method'], categories=['STGCN', 'IT-STGCN'])
tidydata['mrate'] = tidydata['mrate'].astype(str)
tidydata = tidydata.sort_values(by=['model','mrate'])
fig = px.box(tidydata,x='mrate',y='mse',color='method',width=70, log_y=True,facet_col='model',facet_col_wrap=3)
fig.layout['xaxis']['title']['text']=''
fig.layout['xaxis2']['title']['text']='Missing Rate'
fig.layout['xaxis3']['title']['text']=''
fig.layout['yaxis']['title']['text']=''
fig.layout['yaxis4']['title']['text']='MSE(log scale)'
fig.layout['yaxis7']['title']['text']=''
fig.layout['legend']['title']='Method'
fig.layout.xaxis4.showticklabels=True
fig.layout.xaxis5.showticklabels=True
fig.layout.xaxis6.showticklabels=True
fig.layout.xaxis7.showticklabels=True
fig.layout.xaxis8.showticklabels=True
fig.layout.xaxis9.showticklabels=True
fig.layout.yaxis2.showticklabels=True
fig.layout.yaxis3.showticklabels=True
fig.layout.yaxis5.showticklabels=True
fig.layout.yaxis6.showticklabels=True
fig.layout.yaxis8.showticklabels=True
fig.layout.yaxis9.showticklabels=True
for i in range(0, 9):
fig.data[i]['marker']['color'] = 'blue'
fig.data[i]['name'] = 'Classic'
for i in range(9, 17):
fig.data[i]['marker']['color'] = 'red'
fig.data[i]['name'] = 'Proposed'
fig.update_layout(legend=dict(x=1, y=1, traceorder='normal', orientation='v'))
fig.update_layout(template="seaborn")
fig.update_layout(title_text="Models on FiveVTS")
fig.update_layout(height=1200, width=1900)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(legend=dict(
yanchor="top",
y=0.99,
xanchor="left",
x=0.01
))
fig
# with open('fivenodes_fig.pkl', 'wb') as file:
# pickle.dump(fig, file)
# with open('fivenodes_fig.pkl', 'rb') as file:
# fivenodes_fig = pickle.load(file)
# fivenodes_fig3
Missing rate이 높을 떄(70% 혹은 80%), GConvGRU를 예로 들어 데이터 셋마다 모두 Proposed method의 mse가 더 낮은 것 확인
- 내용
- We already showed that the MSE for Peoposed method is mostly lower than Classic methods on missing rate high. In detail, this section set the condition that missing rate high(70% or 80%). The model is GConvGRU on Figure 333. All of six datasets have lower MSE of Proposed method than Classic one. This lead to the fact out method would be helpful when we use real-world data which often has lots of missing values.
tidydata = pd.concat([df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['nof_filters']==12) &
(df['lags']==2) & (df['epoch']==50) & (df['model']=='GConvGRU') & (df['mrate'].isin([0.8]))],
df.query("dataset=='chickenpox' & mtype=='rand' & inter_method == 'linear' & nof_filters==16 & lags==4 & epoch==50 & model=='GConvGRU' & (mrate == 0.8)"),
df.query("dataset=='pedalme' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==4 & epoch==50 & model=='GConvGRU' & (mrate == 0.8)"),
df.query("dataset=='wikimath' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==8 & epoch==50 & model=='GConvGRU' & (mrate == 0.8)"),
df.query("dataset=='windmillsmall' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==8 & epoch==50 & model=='GConvGRU' & (mrate == 0.7)"),
df.query("dataset=='monte' & mtype=='rand' & inter_method == 'nearest' & nof_filters==12 & lags==4 & epoch==50 & model=='GConvGRU' & (mrate == 0.8)")])
tidydata['model'] = pd.Categorical(tidydata['model'], categories=["GConvGRU", "GConvLSTM", "GCLSTM", "LRGCN", "DyGrEncoder", "EvolveGCNH", "EvolveGCNO", "TGCN", "DCRNN"])
categories=["fivenodes", "chickenpox", "pedalme", "wikimath", "windmillsmall", "monte"]
tidydata['method'] = pd.Categorical(tidydata['method'], categories=['STGCN', 'IT-STGCN'])
# tidydata['mrate'] = tidydata['mrate'].astype(str)
tidydata = tidydata.sort_values(by='dataset', key=lambda x: x.map({dataset: i for i, dataset in enumerate(categories)}))
fig = px.box(tidydata,x='mrate',y='mse',color='method',width=70, log_y=True,facet_col='dataset')
fig.layout['xaxis']['title']['text']=''
fig.layout['xaxis2']['title']['text']=''
fig.layout['xaxis3']['title']['text']='Missing Rate'
fig.layout['xaxis4']['title']['text']=''
fig.layout['xaxis5']['title']['text']=''
fig.layout['xaxis6']['title']['text']=''
fig.layout['yaxis']['title']['text']='MSE(log scale)'
fig.layout.yaxis2.showticklabels=True
fig.layout.yaxis3.showticklabels=True
fig.layout.yaxis4.showticklabels=True
fig.layout.yaxis5.showticklabels=True
fig.layout.yaxis6.showticklabels=True
fig.layout.annotations[0].text = 'FiveVTS'
fig.layout.annotations[1].text = 'Chickenpox'
fig.layout.annotations[2].text = 'Pedalme'
fig.layout.annotations[3].text = 'Wikimath'
fig.layout.annotations[4].text = 'Windmillsmall'
fig.layout.annotations[5].text = 'MontevideoBus'
fig.layout['legend']['title']='Method'
for i in range(0, 6):
fig.data[i]['marker']['color'] = 'blue'
fig.data[i]['name'] = 'Classic'
for i in range(6, 12):
fig.data[i]['marker']['color'] = 'red'
fig.data[i]['name'] = 'Proposed'
# fig.update_layout(legend=dict(x=1, y=1, traceorder='normal', orientation='v'))
fig.update_layout(template="seaborn")
fig.update_layout(title_text="GConvGRU on datasets")
# fig.update_yaxes(matches=None)
fig.update_layout(height=800, width=1900)
fig.update_layout(legend=dict(
yanchor="top",
y=0.99,
xanchor="left",
x=0.01
))
fig4
GConvGRU 모델의 fivenodes 데이터에서 결측값 비율이 낮을 때 대비 높을 때 비교
결측값 비율이 커지니 차이가 커진 mse 값 분포
내용
- ?
tidydata = pd.concat([df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['nof_filters']==12) &
(df['lags']==2) & (df['epoch']==50) & (df['model']=='GConvGRU') & (df['mrate'].isin([0.3,0.8]))]])
# # tidydata['model'] = pd.Categorical(tidydata['model'], categories=["GConvGRU", "GConvLSTM", "GCLSTM", "LRGCN", "DyGrEncoder", "EvolveGCNH", "EvolveGCNO", "TGCN", "DCRNN"])
tidydata['method'] = pd.Categorical(tidydata['method'], categories=['STGCN', 'IT-STGCN'])
tidydata['mrate'] = tidydata['mrate'].astype(str)
tidydata = tidydata.sort_values(by=['model','mrate'])
fig = px.box(tidydata,x='mrate',y='mse',width=70, log_y=True,color='method')
fig.layout['xaxis']['title']['text']='Missing Rate'
fig.layout['yaxis']['title']['text']='MSE(log scale)'
fig.data[0]['marker']['color'] = 'blue'
fig.data[0]['name'] = 'Classic'
fig.data[1]['marker']['color'] = 'red'
fig.data[1]['name'] = 'Proposed'
fig.layout['legend']['title']='Method'
# fig.update_layout(legend=dict(x=1, y=1, traceorder='normal', orientation='v'))
fig.update_layout(template="seaborn")
fig.update_layout(title_text="GConvGRU on FiveVTS")
fig.update_layout(height=800, width=1900)
fig.update_layout(legend=dict(
yanchor="top",
y=0.99,
xanchor="left",
x=0.01
))
fig5
time이 적은 pedalme 데이터와 time이 많은 windmillsmall 데이터의 결측값 비율이 높을 때(70% 혹은 80%) 비교
학습할 데이터가 적은 pedalme 데이터는 proposed method 가 dramatic한 낮은 error값을 보이지 않았다.
반면에 time이 긴 windmillsmall은 proposed method가 결측값이 많을 때 mse 값이 낮게 나온 모습을 볼 수 있었다.
내용
- According to the table (dataset explanation), the time for the ‘Pedalme’ dataset is 36, while the time for the ‘Windmillsmall’ dataset is 17,472. Thus, the two datasets have different amounts of data. We compared between two datasets, and the result is on Figure 555. The variations observed in Figure 555 for each dataset indicate that the disparity between methods is relatively small for the ‘Pedalme’ dataset, whereas the difference is more pronounced for the ‘Windmillsmall’ dataset.This phenomenon arises as the increasing quantity of data facilitates learning patterns and trends. Consequently, our approach signifies its effectiveness when there is a substantial amount of data in the time aspect.
tidydata = pd.concat([
df.query("dataset=='pedalme' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==4 & epoch==50 & model=='GConvGRU' & (mrate == 0.8)"),
df.query("dataset=='windmillsmall' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==8 & epoch==50 & model=='GConvGRU' & (mrate == 0.7)")])
tidydata['model'] = pd.Categorical(tidydata['model'], categories=["GConvGRU", "GConvLSTM", "GCLSTM", "LRGCN", "DyGrEncoder", "EvolveGCNH", "EvolveGCNO", "TGCN", "DCRNN"])
tidydata['method'] = pd.Categorical(tidydata['method'], categories=['STGCN', 'IT-STGCN'])
# tidydata['mrate'] = tidydata['mrate'].astype(str)
tidydata = tidydata.sort_values(by=['dataset','model','mrate'])
fig = px.box(tidydata,x='mrate',y='mse',color='method',width=70, log_y=True,facet_col='dataset')
fig.layout['xaxis']['title']['text']=''
fig.layout['yaxis']['title']['text']='MSE(log scale)'
fig.layout['legend']['title']='Method'
fig.layout.xaxis2.title.text=''
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.layout.yaxis2.showticklabels = True
fig.data[0]['marker']['color'] = 'blue'
fig.data[0]['name'] = 'Classic'
fig.data[1]['marker']['color'] = 'blue'
fig.data[1]['name'] = 'Classic'
fig.data[2]['marker']['color'] = 'red'
fig.data[2]['name'] = 'Proposed'
fig.data[3]['marker']['color'] = 'red'
fig.data[3]['name'] = 'Proposed'
# fig.update_layout(legend=dict(x=1, y=1, traceorder='normal', orientation='v'))
fig.update_layout(template="seaborn")
fig.update_layout(title_text="")
fig.update_layout(height=800, width=1800)
fig.update_layout(legend=dict(
yanchor="top",
y=0.99,
xanchor="left",
x=0.01
))
fig6
points로 missing rate이 증가할 수록 mse trend를 mehod별로 비교
- classic에 비해 proposed의 mse trend 계수가 낮음
tidydata = pd.concat([df.query('dataset=="fivenodes" and model=="GConvGRU" and nof_filters==12 and lags==2 and inter_method=="linear" and epoch==50 and mtype=="rand"')])
tidydata = tidydata.sort_values(by=['model', 'mrate'], ascending=[True, True])
def assign_class_order(data):
data['class_order'] = data.groupby('method').cumcount() + 1
return data
tidydata = tidydata.groupby('model').apply(assign_class_order).reset_index(drop=True)
fig = px.scatter(tidydata, x='class_order', y="mse", color='mrate', trendline="ols", trendline_color_override="grey", facet_col='method',color_continuous_scale=px.colors.sequential.Bluered)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.layout.annotations[0].text = 'Classic'
fig.layout.annotations[1].text = 'Proposed'
fig.layout.xaxis.title.text = ''
fig.layout.xaxis2.title.text = ''
fig.layout.yaxis2.showticklabels = True
fig.update_layout(template="seaborn")
fig.update_layout(title_text="")
fig.update_layout(height=600, width=1800)
fig.update_layout(legend=dict(
yanchor="top",
y=0.99,
xanchor="left",
x=0.01
))
figFutureWarning:
Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use
>>> .groupby(..., group_keys=False)
To adopt the future behavior and silence this warning, use
>>> .groupby(..., group_keys=True)
tidydata = pd.concat([df.query("dataset=='chickenpox' & mtype=='rand' & inter_method == 'linear' & nof_filters==16 & lags==4 & epoch==50 & model=='GConvGRU' and mtype=='rand'")])
tidydata = tidydata.sort_values(by=['model', 'mrate'], ascending=[True, True])
def assign_class_order(data):
data['class_order'] = data.groupby('method').cumcount() + 1
return data
tidydata = tidydata.groupby('model').apply(assign_class_order).reset_index(drop=True)
fig = px.scatter(tidydata, x='class_order', y="mse", color='mrate', trendline="ols", trendline_color_override="grey", facet_col='method',color_continuous_scale=px.colors.sequential.Bluered)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.layout.annotations[0].text = 'Classic'
fig.layout.annotations[1].text = 'Proposed'
fig.layout.xaxis.title.text = ''
fig.layout.xaxis2.title.text = ''
fig.layout.yaxis2.showticklabels = True
fig.update_layout(template="seaborn")
fig.update_layout(title_text="")
fig.update_layout(height=600, width=1800)
fig.update_layout(legend=dict(
yanchor="top",
y=0.99,
xanchor="left",
x=0.01
))
figFutureWarning:
Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use
>>> .groupby(..., group_keys=False)
To adopt the future behavior and silence this warning, use
>>> .groupby(..., group_keys=True)
tidydata = pd.concat([df.query("dataset=='pedalme' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==4 & epoch==50 & model=='GConvGRU' & mtype=='rand'")])
tidydata = tidydata.sort_values(by=['model', 'mrate'], ascending=[True, True])
def assign_class_order(data):
data['class_order'] = data.groupby('method').cumcount() + 1
return data
tidydata = tidydata.groupby('model').apply(assign_class_order).reset_index(drop=True)
fig = px.scatter(tidydata, x='class_order', y="mse", color='mrate', trendline="ols", trendline_color_override="grey", facet_col='method',color_continuous_scale=px.colors.sequential.Bluered)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.layout.annotations[0].text = 'Classic'
fig.layout.annotations[1].text = 'Proposed'
fig.layout.yaxis2.showticklabels = True
fig.layout.xaxis.title.text = ''
fig.layout.xaxis2.title.text = ''
fig.update_layout(template="seaborn")
fig.update_layout(title_text="")
fig.update_layout(height=600, width=1800)
fig.update_layout(legend=dict(
yanchor="top",
y=0.99,
xanchor="left",
x=0.01
))
figFutureWarning:
Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use
>>> .groupby(..., group_keys=False)
To adopt the future behavior and silence this warning, use
>>> .groupby(..., group_keys=True)
tidydata = pd.concat([df.query("dataset=='wikimath' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==8 & epoch==50 & model=='GConvGRU' & mtype=='rand'")])
tidydata = tidydata.sort_values(by=['model', 'mrate'], ascending=[True, True])
def assign_class_order(data):
data['class_order'] = data.groupby('method').cumcount() + 1
return data
tidydata = tidydata.groupby('model').apply(assign_class_order).reset_index(drop=True)
fig = px.scatter(tidydata, x='class_order', y="mse",color='mrate', trendline="ols", trendline_color_override="grey", facet_col='method',color_continuous_scale=px.colors.sequential.Bluered)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.layout.annotations[0].text = 'Classic'
fig.layout.annotations[1].text = 'Proposed'
fig.layout.xaxis.title.text = ''
fig.layout.xaxis2.title.text = ''
fig.layout.yaxis2.showticklabels = True
fig.update_layout(template="seaborn")
fig.update_layout(title_text="")
fig.update_layout(height=600, width=1800)
fig.update_layout(legend=dict(
yanchor="top",
y=0.99,
xanchor="left",
x=0.01
))
figFutureWarning:
Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use
>>> .groupby(..., group_keys=False)
To adopt the future behavior and silence this warning, use
>>> .groupby(..., group_keys=True)
tidydata = pd.concat([df.query("dataset=='monte' & mtype=='rand' & inter_method == 'nearest' & nof_filters==12 & lags==4 & epoch==50 & model=='GConvGRU' & mtype=='rand'")])
tidydata = tidydata.sort_values(by=['model', 'mrate'], ascending=[True, True])
def assign_class_order(data):
data['class_order'] = data.groupby('method').cumcount() + 1
return data
tidydata = tidydata.groupby('model').apply(assign_class_order).reset_index(drop=True)
fig = px.scatter(tidydata, x='class_order', y="mse", color='mrate', trendline="ols", trendline_color_override="grey", facet_col='method',color_continuous_scale=px.colors.sequential.Bluered)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.layout.annotations[0].text = 'Classic'
fig.layout.annotations[1].text = 'Proposed'
fig.layout.xaxis.title.text = ''
fig.layout.xaxis2.title.text = ''
fig.layout.yaxis2.showticklabels = True
fig.update_layout(template="seaborn")
fig.update_layout(title_text="")
fig.update_layout(height=600, width=1800)
fig.update_layout(legend=dict(
yanchor="top",
y=0.99,
xanchor="left",
x=0.01
))
figFutureWarning:
Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use
>>> .groupby(..., group_keys=False)
To adopt the future behavior and silence this warning, use
>>> .groupby(..., group_keys=True)
7
time에 따라 node에 따라 mse 확인할 수 있는?-절댓값 크기로 mse 값을 color로
fivenodes = pd.concat([
df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['nof_filters']==12) &
(df['lags']==2) & (df['epoch']==50) & (df['model']=='GConvGRU') & (df['mrate'].isin([0.3, 0.5, 0.6, 0.7, 0.8]))],
df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['nof_filters']==12) &
(df['lags']==2) & (df['epoch']==50) & (df['model']=='GConvLSTM') & (df['mrate'].isin([0.3, 0.5, 0.6, 0.7, 0.8]))],
df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['nof_filters']==4) &
(df['lags']==2) & (df['epoch']==50) & (df['model']=='GCLSTM') & (df['mrate'].isin([0.3, 0.5, 0.6, 0.7, 0.8]))],
df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['nof_filters']==4) &
(df['lags']==2) & (df['epoch']==50) & (df['model']=='LRGCN') & (df['mrate'].isin([0.3, 0.5, 0.6, 0.7, 0.8]))],
df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['nof_filters']==12) &
(df['lags']==2) & (df['epoch']==50) & (df['model']=='DyGrEncoder') & (df['mrate'].isin([0.3, 0.5, 0.6, 0.7, 0.8]))],
df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['lags']==2) &
(df['epoch']==50) & (df['model']=='EvolveGCNH') & (df['mrate'].isin([0.3, 0.5, 0.6, 0.7, 0.8]))],
df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['lags']==2) &
(df['epoch']==50) & (df['model']=='EvolveGCNO') & (df['mrate'].isin([0.3, 0.5, 0.6, 0.7, 0.8]))],
df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['nof_filters']==12) &
(df['lags']==2) & (df['epoch']==50) & (df['model']=='TGCN') & (df['mrate'].isin([0.3, 0.5, 0.6, 0.7, 0.8]))],
df[(df['dataset']=='fivenodes') & (df['mtype']=='rand') & (df['inter_method']=='linear') & (df['nof_filters']==2) &
(df['lags']==2) & (df['epoch']==50) & (df['model']=='DCRNN') & (df['mrate'].isin([0.3, 0.5, 0.6, 0.7, 0.8]))]
])
fivenodes['nodes'] = 5
fivenodes['time'] = 200
chickenpox = pd.concat([
df.query("dataset=='chickenpox' & mtype=='rand' & inter_method == 'linear' & nof_filters==16 & lags==4 & epoch==50 & model=='GConvGRU' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='chickenpox' & mtype=='rand' & inter_method == 'linear' & nof_filters==32 & lags==4 & epoch==50 & model=='GConvLSTM' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='chickenpox' & mtype=='rand' & inter_method == 'linear' & nof_filters==16 & lags==4 & epoch==50 & model=='GCLSTM' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='chickenpox' & mtype=='rand' & inter_method == 'linear' & nof_filters==8 & lags==4 & epoch==50 & model=='LRGCN' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='chickenpox' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==4 & epoch==50 & model=='DyGrEncoder' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='chickenpox' & mtype=='rand' & inter_method == 'linear' & lags==4 & epoch==50 & model=='EvolveGCNH' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='chickenpox' & mtype=='rand' & inter_method == 'linear' & lags==4 & epoch==50 & model=='EvolveGCNO' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='chickenpox' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==4 & epoch==50 & model=='TGCN' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='chickenpox' & mtype=='rand' & inter_method == 'linear' & nof_filters==16 & lags==4 & epoch==50 & model=='DCRNN' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)")
])
chickenpox['nodes'] = 20
chickenpox['time'] = 522
pedalme = pd.concat([
df.query("dataset=='pedalme' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==4 & epoch==50 & model=='GConvGRU' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='pedalme' & mtype=='rand' & inter_method == 'linear' & nof_filters==2 & lags==4 & epoch==50 & model=='GConvLSTM' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='pedalme' & mtype=='rand' & inter_method == 'linear' & nof_filters==4 & lags==4 & epoch==50 & model=='GCLSTM' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='pedalme' & mtype=='rand' & inter_method == 'linear' & nof_filters==8 & lags==4 & epoch==50 & model=='LRGCN' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='pedalme' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==4 & epoch==50 & model=='DyGrEncoder' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='pedalme' & mtype=='rand' & inter_method == 'linear' & lags==4 & epoch==50 & model=='EvolveGCNH' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='pedalme' & mtype=='rand' & inter_method == 'linear' & lags==4 & epoch==50 & model=='EvolveGCNO' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='pedalme' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==4 & epoch==50 & model=='TGCN' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='pedalme' & mtype=='rand' & inter_method == 'linear' & nof_filters==8 & lags==4 & epoch==50 & model=='DCRNN' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)")
], ignore_index=True)
pedalme['nodes'] = 15
pedalme['time'] = 36
wikimath = pd.concat([
df.query("dataset=='wikimath' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==8 & epoch==50 & model=='GConvGRU' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='wikimath' & mtype=='rand' & inter_method == 'linear' & nof_filters==64 & lags==8 & epoch==50 & model=='GConvLSTM' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='wikimath' & mtype=='rand' & inter_method == 'linear' & nof_filters==64 & lags==8 & epoch==50 & model=='GCLSTM' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='wikimath' & mtype=='rand' & inter_method == 'linear' & nof_filters==32 & lags==8 & epoch==50 & model=='LRGCN' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='wikimath' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==8 & epoch==50 & model=='DyGrEncoder' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='wikimath' & mtype=='rand' & inter_method == 'linear' & lags==8 & epoch==50 & model=='EvolveGCNH' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='wikimath' & mtype=='rand' & inter_method == 'linear' & lags==8 & epoch==50 & model=='EvolveGCNO' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='wikimath' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==8 & epoch==50 & model=='TGCN' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)"),
df.query("dataset=='wikimath' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==8 & epoch==50 & model=='DCRNN' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.8)")
], ignore_index=True)
wikimath['nodes'] = 1068
wikimath['time'] = 731
windmillsmall = pd.concat([
df.query("dataset=='windmillsmall' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==8 & epoch==50 & model=='GConvGRU' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.7)"),
df.query("dataset=='windmillsmall' & mtype=='rand' & inter_method == 'linear' & nof_filters==16 & lags==8 & epoch==50 & model=='GConvLSTM' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.7)"),
df.query("dataset=='windmillsmall' & mtype=='rand' & inter_method == 'linear' & nof_filters==16 & lags==8 & epoch==50 & model=='GCLSTM' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.7)"),
df.query("dataset=='windmillsmall' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==8 & epoch==50 & model=='LRGCN' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.7)"),
df.query("dataset=='windmillsmall' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==8 & epoch==50 & model=='DyGrEncoder' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.7)"),
df.query("dataset=='windmillsmall' & mtype=='rand' & inter_method == 'linear' & lags==8 & epoch==50 & model=='EvolveGCNH' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.7)"),
df.query("dataset=='windmillsmall' & mtype=='rand' & inter_method == 'linear' & lags==8 & epoch==50 & model=='EvolveGCNO' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.7)"),
df.query("dataset=='windmillsmall' & mtype=='rand' & inter_method == 'linear' & nof_filters==12 & lags==8 & epoch==50 & model=='TGCN' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.7)"),
df.query("dataset=='windmillsmall' & mtype=='rand' & inter_method == 'linear' & nof_filters==4 & lags==8 & epoch==50 & model=='DCRNN' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.6 | mrate == 0.7)")
], ignore_index=True)
windmillsmall['nodes'] = 11
windmillsmall['time'] = 17472
monte = pd.concat([
df.query("dataset=='monte' & mtype=='rand' & inter_method == 'nearest' & nof_filters==12 & lags==4 & epoch==50 & model=='GConvGRU' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.7 | mrate == 0.8)"),
df.query("dataset=='monte' & mtype=='rand' & inter_method == 'nearest' & nof_filters==12 & lags==4 & epoch==50 & model=='GConvLSTM' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.7 | mrate == 0.8)"),
df.query("dataset=='monte' & mtype=='rand' & inter_method == 'nearest' & nof_filters==12 & lags==4 & epoch==50 & model=='GCLSTM' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.7 | mrate == 0.8)"),
df.query("dataset=='monte' & mtype=='rand' & inter_method == 'nearest' & nof_filters==2 & lags==4 & epoch==50 & model=='LRGCN' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.7 | mrate == 0.8)"),
df.query("dataset=='monte' & mtype=='rand' & inter_method == 'nearest' & nof_filters==12 & lags==4 & epoch==50 & model=='DyGrEncoder' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.7 | mrate == 0.8)"),
df.query("dataset=='monte' & mtype=='rand' & inter_method == 'nearest' & lags==4 & epoch==50 & model=='EvolveGCNH' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.7 | mrate == 0.8)"),
df.query("dataset=='monte' & mtype=='rand' & inter_method == 'nearest' & lags==4 & epoch==50 & model=='EvolveGCNO' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.7 | mrate == 0.8)"),
df.query("dataset=='monte' & mtype=='rand' & inter_method == 'nearest' & nof_filters==8 & lags==4 & epoch==50 & model=='TGCN' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.7 | mrate == 0.8)"),
df.query("dataset=='monte' & mtype=='rand' & inter_method == 'nearest' & nof_filters==12 & lags==4 & epoch==50 & model=='DCRNN' & (mrate == 0.3 | mrate == 0.5 | mrate == 0.7 | mrate == 0.8)")
], ignore_index=True)
monte['nodes'] = 675
monte['time'] = 744
tidydata = pd.concat([fivenodes, chickenpox, pedalme, wikimath,windmillsmall, monte])
tidydata['model'] = pd.Categorical(tidydata['model'], categories=["GConvGRU", "GConvLSTM", "GCLSTM", "LRGCN", "DyGrEncoder", "EvolveGCNH", "EvolveGCNO", "TGCN", "DCRNN"])
tidydata['method'] = pd.Categorical(tidydata['method'], categories=['STGCN', 'IT-STGCN'])
fig = px.scatter(tidydata, x='time', y="nodes", log_x=True,log_y=True,color='mrate',size=np.abs(tidydata['mse']),facet_col='method',
color_continuous_scale=px.colors.sequential.Bluered)
# fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
# fig.layout.annotations[0].text = 'Classic'
# fig.layout.annotations[1].text = 'Proposed'
# fig.layout.xaxis.title.text = ''
# fig.layout.xaxis2.title.text = ''
# fig.layout.yaxis2.showticklabels = True
fig.update_layout(template="seaborn")
# fig.update_layout(title_text="")
fig.update_layout(height=1000, width=1800)
# fig.update_layout(legend=dict(
# yanchor="top",
# y=0.99,
# xanchor="left",
# x=0.01
# ))
fig